Necessary Packages
library(vegan)
library(ggplot2)
library(ggpubr)
library(pairwiseAdonis)
library(reshape2)
library(phyloseq)
library(userfriendlyscience)
library(microbiome)
library(themetagenomics)
library(RColorBrewer)
library(car)
library(betareg)
library(fitdistrplus)
library(emmeans)
library(multcompView)
Combine all similar taxa by ‘Genus’
CH_1_ALL<-read.csv(file ="~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_seqtab_ALL_COMBINED.csv", header = TRUE, check.names = FALSE, row.names = 1)
CH_1_ALL<-as.matrix(CH_1_ALL)
CH_1_ALL_otu<-otu_table(CH_1_ALL, taxa_are_rows = TRUE)
CH_1_ALL_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_ALL_Metadata.csv", header=TRUE, check.names = FALSE, row.names = 1)
CH_1_ALL_metadata<-sample_data(CH_1_ALL_metadata)
CH_1_taxa<-read.csv(file ="~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Taxa_Graphs/CH_1_Clean_seq_REFERENCE.csv", header = TRUE, check.names = FALSE, row.names = 1)
CH_1_taxa<-as.matrix(CH_1_taxa)
CH_1_taxa_Phylo<-tax_table(CH_1_taxa)
CH_1_ALL_Phylo<-phyloseq(CH_1_ALL_otu,CH_1_taxa_Phylo,CH_1_ALL_metadata)
CH_1_ALL_melted<-tax_glom(CH_1_ALL_Phylo, "Genus")
write.csv(otu_table(CH_1_ALL_melted), "CH_1_seqtab_ALL_COMBINED.csv")
ALL (H2O + NP) NMDS:
CH_1_ALL_ASV<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_seqtab_ALL_COMBINED.csv", header = TRUE, check.names = FALSE, row.names = 1)
CH_1_ALL_ASV_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_ALL_Metadata.csv", header=TRUE, check.names = FALSE)
CH_1_ALL_t_ASV<-as.data.frame(t(CH_1_ALL_ASV))
CH_1_ALL_ASV_min_depth<-min(colSums(CH_1_ALL_ASV))
CH_1_ALL_t_ASV_rarefied<-as.data.frame(round(rrarefy(CH_1_ALL_t_ASV,10835)))
CH_1_ALL_ASV_dist = as.matrix(vegdist(CH_1_ALL_t_ASV_rarefied, "bray"))
CH_1_ALL_ASV_NMDS = metaMDS(CH_1_ALL_ASV_dist)
## Run 0 stress 0.106868
## Run 1 stress 0.09902619
## ... New best solution
## ... Procrustes: rmse 0.0545745 max resid 0.3373512
## Run 2 stress 0.1101984
## Run 3 stress 0.09893444
## ... New best solution
## ... Procrustes: rmse 0.01040372 max resid 0.05190541
## Run 4 stress 0.1013031
## Run 5 stress 0.112083
## Run 6 stress 0.09927236
## ... Procrustes: rmse 0.010096 max resid 0.05008335
## Run 7 stress 0.09914968
## ... Procrustes: rmse 0.01664224 max resid 0.05253555
## Run 8 stress 0.1111629
## Run 9 stress 0.1014855
## Run 10 stress 0.1067653
## Run 11 stress 0.1071975
## Run 12 stress 0.1014456
## Run 13 stress 0.1013426
## Run 14 stress 0.1063761
## Run 15 stress 0.1118371
## Run 16 stress 0.09940842
## ... Procrustes: rmse 0.01363718 max resid 0.05126574
## Run 17 stress 0.1099479
## Run 18 stress 0.09881693
## ... New best solution
## ... Procrustes: rmse 0.008617948 max resid 0.05246118
## Run 19 stress 0.1070479
## Run 20 stress 0.111242
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
CH_1_ALL_ASV_MDS1 <-CH_1_ALL_ASV_NMDS$points[,1]
CH_1_ALL_ASV_MDS2<-CH_1_ALL_ASV_NMDS$points[,2]
CH_1_ALL_ASV_NMDS$stress
## [1] 0.09881693
CH_1_ALL_ASV_NMDS<-data.frame(MDS1=CH_1_ALL_ASV_MDS1, MDS2=CH_1_ALL_ASV_MDS2, Sample=CH_1_ALL_ASV_metadata$Sample, Location=CH_1_ALL_ASV_metadata$Location)
CH_1_ALL_ASV<-ggplot(CH_1_ALL_ASV_NMDS, aes(x = MDS1, y = MDS2, col = Location)) + geom_point() + stat_ellipse() + theme_classic() + scale_colour_manual(values = c("red", "black", "blue", "green")) + labs(title = "Cranberry vs. French Creek Microbial Communities", subtitle = "Northern Pike Gut Microbiota and Water Column Bacteria")
CH_1_ALL_ASV
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
pairwise.adonis(CH_1_ALL_ASV_dist, factors = CH_1_ALL_ASV_metadata$Location)
## pairs F.Model R2 p.value
## 1 Cranberry H2O vs French Creek H2O 4.979361 0.1993390 0.003
## 2 Cranberry H2O vs French Creek NP 357.432404 0.9520553 0.001
## 3 Cranberry H2O vs Cranberry NP 359.549912 0.9374267 0.001
## 4 French Creek H2O vs French Creek NP 436.650859 0.9604092 0.001
## 5 French Creek H2O vs Cranberry NP 410.057267 0.9447078 0.001
## 6 French Creek NP vs Cranberry NP 11.620734 0.3456419 0.001
## p.adjusted sig
## 1 0.018 .
## 2 0.006 *
## 3 0.006 *
## 4 0.006 *
## 5 0.006 *
## 6 0.006 *
H2O NMDS:
CH_1_H2O_ASV<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_seqs_taxa_H2O.csv", header = TRUE, check.names = FALSE, row.names = 1)
CH_1_H2O_ASV_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_H2O_metadata.csv", header=TRUE, check.names = FALSE)
CH_1_H2O_t_ASV<-as.data.frame(t(CH_1_H2O_ASV))
CH_1_H2O_ASV_min_depth<-min(colSums(CH_1_H2O_ASV))
CH_1_H2O_t_ASV_rarefied<-as.data.frame(round(rrarefy(CH_1_H2O_t_ASV, 10835)))
CH_1_H2O_ASV_dist = as.matrix(vegdist(CH_1_H2O_t_ASV_rarefied, "bray"))
CH_1_H2O_ASV_NMDS = metaMDS(CH_1_H2O_ASV_dist)
## Run 0 stress 0.1653655
## Run 1 stress 0.1623115
## ... New best solution
## ... Procrustes: rmse 0.1255831 max resid 0.4530973
## Run 2 stress 0.1720984
## Run 3 stress 0.220857
## Run 4 stress 0.2321761
## Run 5 stress 0.1623115
## ... Procrustes: rmse 2.237287e-05 max resid 7.061574e-05
## ... Similar to previous best
## Run 6 stress 0.1653655
## Run 7 stress 0.2403046
## Run 8 stress 0.2115946
## Run 9 stress 0.264751
## Run 10 stress 0.2295816
## Run 11 stress 0.1653656
## Run 12 stress 0.1805557
## Run 13 stress 0.2391431
## Run 14 stress 0.2195277
## Run 15 stress 0.2215675
## Run 16 stress 0.1623115
## ... Procrustes: rmse 1.427786e-05 max resid 4.307807e-05
## ... Similar to previous best
## Run 17 stress 0.1653656
## Run 18 stress 0.1635876
## Run 19 stress 0.1653662
## Run 20 stress 0.1623115
## ... Procrustes: rmse 1.184495e-05 max resid 3.938038e-05
## ... Similar to previous best
## *** Solution reached
CH_1_H2O_ASV_MDS1 <-CH_1_H2O_ASV_NMDS$points[,1]
CH_1_H2O_ASV_MDS2<-CH_1_H2O_ASV_NMDS$points[,2]
CH_1_H2O_ASV_NMDS$stress
## [1] 0.1623115
CH_1_H2O_ASV_NMDS<-data.frame(MDS1=CH_1_H2O_ASV_MDS1, MDS2=CH_1_H2O_ASV_MDS2, Sample=CH_1_H2O_ASV_metadata$Sample, Location=CH_1_H2O_ASV_metadata$Location, Position = CH_1_H2O_ASV_metadata$Position)
CH_1_H2O_ASV<-ggplot(CH_1_H2O_ASV_NMDS, aes(x = MDS1, y = MDS2, col = Location)) + geom_point() + stat_ellipse() + theme_classic() + scale_colour_manual(values = c("red", "black")) + labs(title = "Water Column Bacteria", subtitle = "Cranberry Creek vs. French Creek")
CH_1_H2O_ASV
#Significance by Location
pairwise.adonis(CH_1_H2O_ASV_dist, factors = CH_1_H2O_ASV_metadata$Location)
## pairs F.Model R2 p.value p.adjusted
## 1 Cranberry H2O vs French Creek H2O 5.707285 0.2220104 0.001 0.001
## sig
## 1 **
#Significance by Water Column Position
pairwise.adonis(CH_1_H2O_ASV_dist, factors = CH_1_H2O_ASV_metadata$Position)
## pairs F.Model R2 p.value p.adjusted sig
## 1 TOP vs BOTTOM 0.9511421 0.0453981 0.398 0.398
NP NMDS:
CH_1_NP_ASV<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_seqs_taxa_NP.csv", header = TRUE, check.names = FALSE, row.names = 1)
CH_1_NP_ASV_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_NP_metadata.csv", header=TRUE, check.names = FALSE)
CH_1_NP_t_ASV<-as.data.frame(t(CH_1_NP_ASV))
CH_1_NP_ASV_min_depth<-min(colSums(CH_1_NP_ASV))
CH_1_NP_t_ASV_rarefied<-as.data.frame(round(rrarefy(CH_1_NP_t_ASV,CH_1_NP_ASV_min_depth)))
CH_1_NP_ASV_dist = as.matrix(vegdist(CH_1_NP_t_ASV_rarefied, "bray"))
CH_1_NP_ASV_NMDS = metaMDS(CH_1_NP_ASV_dist)
## Run 0 stress 0.1785157
## Run 1 stress 0.2012448
## Run 2 stress 0.1757351
## ... New best solution
## ... Procrustes: rmse 0.09479133 max resid 0.2863075
## Run 3 stress 0.1753254
## ... New best solution
## ... Procrustes: rmse 0.1762159 max resid 0.4552027
## Run 4 stress 0.1805747
## Run 5 stress 0.3731032
## Run 6 stress 0.1877325
## Run 7 stress 0.1757365
## ... Procrustes: rmse 0.1762787 max resid 0.4632109
## Run 8 stress 0.2174771
## Run 9 stress 0.1763051
## Run 10 stress 0.1753209
## ... New best solution
## ... Procrustes: rmse 0.0007094411 max resid 0.002501052
## ... Similar to previous best
## Run 11 stress 0.1816692
## Run 12 stress 0.1832908
## Run 13 stress 0.1779428
## Run 14 stress 0.1862091
## Run 15 stress 0.1757334
## ... Procrustes: rmse 0.1762785 max resid 0.4637288
## Run 16 stress 0.1890442
## Run 17 stress 0.1759089
## Run 18 stress 0.1797656
## Run 19 stress 0.1790026
## Run 20 stress 0.1980181
## *** Solution reached
CH_1_NP_ASV_MDS1 <-CH_1_NP_ASV_NMDS$points[,1]
CH_1_NP_ASV_MDS2<-CH_1_NP_ASV_NMDS$points[,2]
CH_1_NP_ASV_NMDS$stress
## [1] 0.1753209
CH_1_NP_ASV_NMDS<-data.frame(MDS1=CH_1_NP_ASV_MDS1, MDS2=CH_1_NP_ASV_MDS2, Sample=CH_1_NP_ASV_metadata$Sample, Location=CH_1_NP_ASV_metadata$Location, Size = CH_1_NP_ASV_metadata$Size)
CH_1_NP_ASV<-ggplot(CH_1_NP_ASV_NMDS, aes(x = MDS1, y = MDS2, col = Location)) + geom_point() + stat_ellipse() + theme_classic()+ scale_colour_manual(values = c("red", "black")) + labs(title = "Northern Pike Gut Microbiota", subtitle = "Cranberry Creek vs. French Creek")
CH_1_NP_ASV
#Significance by Location
pairwise.adonis(CH_1_NP_ASV_dist, factors = CH_1_NP_ASV_metadata$Location)
## pairs F.Model R2 p.value p.adjusted sig
## 1 French Creek vs Cranberry 12.2461 0.3575911 0.001 0.001 **
#Significance by Fish Size
pairwise.adonis(CH_1_NP_ASV_dist, factors = CH_1_NP_ASV_metadata$Size)
## pairs F.Model R2 p.value p.adjusted sig
## 1 61-70 vs 51-60 2.7164437 0.31164587 0.080 0.80
## 2 61-70 vs 71-80 0.3452952 0.03694856 0.819 1.00
## 3 61-70 vs 81-90 1.2542426 0.13553163 0.314 1.00
## 4 61-70 vs 91-100+ 1.0390951 0.11495565 0.361 1.00
## 5 51-60 vs 71-80 2.3495664 0.25130218 0.095 0.95
## 6 51-60 vs 81-90 5.9256114 0.49688114 0.029 0.29
## 7 51-60 vs 91-100+ 1.7298797 0.22379129 0.260 1.00
## 8 71-80 vs 81-90 1.6601743 0.15573613 0.203 1.00
## 9 71-80 vs 91-100+ 1.2471671 0.12170848 0.298 1.00
## 10 81-90 vs 91-100+ 0.8860181 0.09970923 0.441 1.00
Initial Analysis is completed with biom-format and mothur commands using bash commands. Please refer to biom_mothur_code.txt in pathway ~/Dropbox/bdgallo_MS_SUNYESF/NON_R_Scripts for information
The final product fom these commands is CH_1_seqtab_ALL_COMBINED.groups.rarefaction
CH_1_ALLSEQS_Rare<-read.table("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_Rarefaction_Alpha_Diversity/CH_1_seqtab_ALL_COMBINED.groups.rarefaction", header = TRUE)
CH_1_ALLSEQS_Rare_DataColumnsPrimary<-CH_1_ALLSEQS_Rare[grepl("use", names(CH_1_ALLSEQS_Rare))]
CH_1_ALLSEQS_Rare_numSampled<-CH_1_ALLSEQS_Rare[, c("numsampled")]
CH_1_ALLSEQS_Rare<-cbind (CH_1_ALLSEQS_Rare_numSampled, CH_1_ALLSEQS_Rare_DataColumnsPrimary)
CH_1_ALLSEQS_Rare_long<-melt(CH_1_ALLSEQS_Rare, id.vars="CH_1_ALLSEQS_Rare_numSampled")
write.csv(CH_1_ALLSEQS_Rare_long, "CH_1_ALLSEQS_Rare_long.csv")
###Export .csv file and manually input sample locations using metadata###
CH_1_ALLSEQS_Rare_long<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_Rarefaction_Alpha_Diversity/CH_1_ALLSEQS_Rare_long.csv", header = TRUE)
options (scipen = 10000000)
CH_1_ALLSEQS_rarefaction<-ggplot(data=CH_1_ALLSEQS_Rare_long, aes(x = CH_1_ALLSEQS_Rare_numSampled, y = value, color = Location.Type)) + geom_line(aes(group=variable), size=1.5) + xlab("Number of Sequences") + ylab("ASVs (100%)") + labs(title = "Rarefaction Curves", subtitle = "Cranberry Creek and French Creek H2O and YOY Northern Pike") + theme_classic()+ scale_color_manual(values = c("black", "red", "blue", "green"))
CH_1_ALLSEQS_rarefaction
## Warning: Removed 13816 rows containing missing values (geom_path).
###Using CH_1_ALLSEQS_Rare_long.csv file, split up H2O and NP Samples into two separate .csv files###
CH_1_H2O_Rare_long<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_Rarefaction_Alpha_Diversity/CH_1_H2O_long.csv", header = TRUE)
CH_1_NP_Rare_long<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_Rarefaction_Alpha_Diversity/CH_1_NP_long.csv", header = TRUE)
CH_1_H2O_rarefaction<-ggplot(data=CH_1_H2O_Rare_long, aes(x = CH_1_ALLSEQS_Rare_numSampled, y = value, color = Location.Type)) + geom_line(aes(group=variable), size=1.5) + scale_color_manual(values = c("blue", "green", "pink")) + xlab("Number of Sequences") + ylab("ASVs (100%)") + labs(title = "Rarefaction Curves", subtitle = "Cranberry Creek and French Creek H2O") + theme_classic()
CH_1_H2O_rarefaction
## Warning: Removed 7731 rows containing missing values (geom_path).
CH_1_NP_rarefaction<-ggplot(data=CH_1_NP_Rare_long, aes(x = CH_1_ALLSEQS_Rare_numSampled, y = value, color = Location.Type)) + geom_line(aes(group=variable), size=1.5) + scale_color_manual(values = c("black", "red", "brown")) + xlab("Number of Sequences") + ylab("ASVs (100%)") + labs(title = "Rarefaction Curves", subtitle = "Cranberry Creek and French Creek Northern Pike") + theme_classic()
CH_1_NP_rarefaction
## Warning: Removed 6085 rows containing missing values (geom_path).
Note that taxa excel sheet was cleaned manually by removing all NA calls in Kingdom and Phylum columns. Additionally, any Kingdom tracing to Eukaryotes/Archaea were also removed
Normalized Top 10 Taxa Bar Graphs:
#'CH_1_ALL_melted' phyloseq object carried over from Chunk #2 above
CH_1_ALL_t_ASV_rarefied_t<-(t(CH_1_ALL_t_ASV_rarefied)) #Normalized ALL otu table
CH_1_ALL_Norm<-as.matrix(CH_1_ALL_t_ASV_rarefied_t)
CH_1_ALL_otu_Norm<-otu_table(CH_1_ALL_Norm, taxa_are_rows = TRUE)
CH_1_ALL_Phylo_Norm<-phyloseq(CH_1_ALL_otu_Norm,CH_1_taxa_Phylo,CH_1_ALL_metadata)
CH_1_ALL_melted_Norm<-tax_glom(CH_1_ALL_Phylo_Norm, "Genus")
CH_1_ALL_melted_otu<-otu_table(CH_1_ALL_melted_Norm)
CH_1_taxa_top10<-names(sort(taxa_sums(CH_1_ALL_melted_Norm), decreasing = TRUE)) [1:10]
CH_1_ALL_Phylo.top10<-transform_sample_counts(CH_1_ALL_melted_Norm, function(CH_1_ALL_melted_otu) CH_1_ALL_melted_otu/sum(CH_1_ALL_melted_otu))
CH_1_ALL_Phylo.top10<-prune_taxa(CH_1_taxa_top10, CH_1_ALL_Phylo.top10)
sample_data(CH_1_ALL_Phylo.top10)$Location<-factor(sample_data(CH_1_ALL_Phylo.top10)$Location, levels = c("Cranberry H2O", "French Creek H2O", "Cranberry NP", "French Creek NP"))
CH_1_Top10<-plot_bar(CH_1_ALL_Phylo.top10, x = "Location", fill = "Genus") + scale_fill_brewer(palette = "Paired") + theme_classic()
## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
CH_1_Top10 + geom_bar(aes(fill=Genus), stat="identity", position = "stack") + scale_fill_brewer(palette = "Paired")
## Scale for 'fill' is already present. Adding another scale for 'fill',
## which will replace the existing scale.
write.csv(otu_table(CH_1_ALL_Phylo.top10), "CH_1_Top10_Taxa_Percent.csv")
#Offload Top20 ASV data - calculate average for each group so that y-axis = 0-1 (e.g. 0 - 100%). Also calculate SE and reupload data.
#ENV
ZOOP_Palette<-function (n)
{
x <- ramp(seq.int(0, 1, length.out = n))
if (ncol(x) == 4L)
rgb(x[, 1L], x[, 2L], x[, 3L], x[, 4L], maxColorValue = 255)
else rgb(x[, 1L], x[, 2L], x[, 3L], maxColorValue = 255)
}
CH_1_Top10_Taxa_Percent_ENV<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Taxa_Graphs/CH_1_Top10_Taxa_Percent_ENV.csv", header=TRUE, check.names = FALSE)
CH_1_Top10_Taxa_Percent_ENV$Location<-factor(CH_1_Top10_Taxa_Percent_ENV$Location, levels = c("Cranberry/H2O", "French Creek/H2O"))
colourCount<-length(unique(CH_1_Top10_Taxa_Percent_ENV$Genus))
getPalette<-colorRampPalette(brewer.pal(8, "Set1"))
CH_1_Top10_Taxa_Percent_ENV_Graph<-ggplot(CH_1_Top10_Taxa_Percent_ENV, aes(x=Location, y = Average, fill = Genus)) + geom_bar(colour = "black", stat = "identity", position = position_dodge()) + theme_classic() + scale_fill_manual(values = getPalette(colourCount)) + labs(title = "Larval Northern Pike Gut Microbiota", subtitle = "Top 10 Bacterial Genera Wetland Water Samples", x = "", y = "Relative ASV Abundance (%)")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + geom_errorbar(aes(ymin=Average-SE, ymax = Average+SE), width=.2, position=position_dodge(.9))
CH_1_Top10_Taxa_Percent_ENV_Graph
#FISH
CH_1_Top10_Taxa_Percent_FISH<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Taxa_Graphs/CH_1_Top10_Taxa_Percent_FISH.csv", header=TRUE, check.names = FALSE)
CH_1_Top10_Taxa_Percent_FISH$Location<-factor(CH_1_Top10_Taxa_Percent_FISH$Location, levels = c("Cranberry/NP", "French Creek/NP"))
colourCount<-length(unique(CH_1_Top10_Taxa_Percent_FISH$Genus))
getPalette<-colorRampPalette(brewer.pal(8, "Set1"))
CH_1_Top10_Taxa_Percent_FISH_Graph<-ggplot(CH_1_Top10_Taxa_Percent_FISH, aes(x=Location, y = Average, fill = Genus)) + geom_bar(colour = "black", stat = "identity", position = position_dodge()) + theme_classic() + scale_fill_manual(values = getPalette(colourCount)) + labs(title = "Larval Northern Pike Gut Microbiota", subtitle = "Top 10 Bacterial Genera in Northern Pike", x = "", y = "")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + geom_errorbar(aes(ymin=Average-SE, ymax = Average+SE), width=.2, position=position_dodge(.9))
CH_1_Top10_Taxa_Percent_FISH_Graph
#COMBINED
ggarrange(CH_1_Top10_Taxa_Percent_ENV_Graph, CH_1_Top10_Taxa_Percent_FISH_Graph, labels = c("A", "B"), common.legend = TRUE, legend = "right")
Heatmap Clustering (Top 20 Taxa - Normalized):
#NMDS Heatmap
plot_heatmap(CH_1_ALL_Phylo.top10, sample.label = "Location", "NMDS", "bray", "Genus")
## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning: Transformation introduced infinite values in discrete y-axis
###Phyloseq Object from CH.1 taxa analysis carried over###
CH_1_ALL_Phylo_Alpha<-prune_taxa(taxa_sums(CH_1_ALL_melted_Norm) > 0, CH_1_ALL_melted_Norm)
plot_richness(CH_1_ALL_Phylo_Alpha, x = "Sample", measures = c("Observed", "Chao1","Shannon")) + theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 92 rows containing missing values (geom_errorbar).
plot_richness(CH_1_ALL_Phylo_Alpha, x = "Location", measures = c("Observed", "Chao1", "Shannon")) + theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 92 rows containing missing values (geom_errorbar).
CH_1_ALL_Alpha<-estimate_richness(CH_1_ALL_Phylo, split = TRUE, measures = c("Observed", "Chao1", "Shannon"))
CH_1_ALL_Alpha$Location<-CH_1_ALL_ASV_NMDS$Location
head(CH_1_ALL_Alpha)
## Observed Chao1 se.chao1 Shannon
## H2O.041.513.718.1F_filt.fastq.gz 82 82.12500 0.4435997 2.588605
## H2O.042.515.718.1F_filt.fastq.gz 143 143.90909 1.3466632 3.268961
## H2O.043.516.718.1F_filt.fastq.gz 113 119.87500 5.5654232 2.627143
## H2O.044.517.718.1F_filt.fastq.gz 144 146.33333 2.5533155 2.510726
## H2O.045.518.719.1F_filt.fastq.gz 127 127.10000 0.3818439 2.544150
## H2O.046.520.719.1F_filt.fastq.gz 95 96.15385 1.5235758 2.707144
## Location
## H2O.041.513.718.1F_filt.fastq.gz Cranberry H2O
## H2O.042.515.718.1F_filt.fastq.gz Cranberry H2O
## H2O.043.516.718.1F_filt.fastq.gz Cranberry H2O
## H2O.044.517.718.1F_filt.fastq.gz Cranberry H2O
## H2O.045.518.719.1F_filt.fastq.gz Cranberry H2O
## H2O.046.520.719.1F_filt.fastq.gz Cranberry H2O
CH_1_ALL_Obs_PH<-posthocTGH(y = CH_1_ALL_Alpha$Observed, x = CH_1_ALL_Alpha$Location, method = "games-howell")
CH_1_ALL_Obs_PH
## n means variances
## Cranberry H2O 11 118 1052
## Cranberry NP 15 12 43
## French Creek H2O 11 116 1193
## French Creek NP 9 17 41
##
## diff ci.lo ci.hi t df p
## Cranberry NP-Cranberry H2O -105.5 -135.6 -75 10.63 11 <.01
## French Creek H2O-Cranberry H2O -2.0 -42.0 38 0.14 20 1
## French Creek NP-Cranberry H2O -101.1 -131.3 -71 10.10 11 <.01
## French Creek H2O-Cranberry NP 103.5 71.5 136 9.81 11 <.01
## French Creek NP-Cranberry NP 4.4 -3.3 12 1.62 17 .39
## French Creek NP-French Creek H2O -99.1 -131.2 -67 9.32 11 <.01
CH_1_ALL_Chao1_PH<-posthocTGH(y = CH_1_ALL_Alpha$Chao1, x = CH_1_ALL_Alpha$Location, method = "games-howell")
CH_1_ALL_Chao1_PH
## n means variances
## Cranberry H2O 11 120 1029
## Cranberry NP 15 12 43
## French Creek H2O 11 119 1128
## French Creek NP 9 17 41
##
## diff ci.lo ci.hi t df p
## Cranberry NP-Cranberry H2O -107.06 -136.8 -77 10.900 11 <.01
## French Creek H2O-Cranberry H2O -0.64 -39.8 39 0.045 20 1
## French Creek NP-Cranberry H2O -102.64 -132.5 -73 10.360 11 <.01
## French Creek H2O-Cranberry NP 106.42 75.3 138 10.364 11 <.01
## French Creek NP-Cranberry NP 4.42 -3.3 12 1.619 17 .39
## French Creek NP-French Creek H2O -102.00 -133.2 -71 9.855 11 <.01
CH_1_ALL_Shannon_PH<-posthocTGH(y = CH_1_ALL_Alpha$Shannon, x = CH_1_ALL_Alpha$Location, method = "games-howell")
CH_1_ALL_Shannon_PH
## n means variances
## Cranberry H2O 11 2.7 0.12
## Cranberry NP 15 1.0 0.22
## French Creek H2O 11 2.8 0.11
## French Creek NP 9 1.1 0.29
##
## diff ci.lo ci.hi t df p
## Cranberry NP-Cranberry H2O -1.681 -2.12 -1.24 10.48 24 <.01
## French Creek H2O-Cranberry H2O 0.079 -0.32 0.48 0.55 20 .94
## French Creek NP-Cranberry H2O -1.612 -2.22 -1.01 7.80 13 <.01
## French Creek H2O-Cranberry NP 1.760 1.32 2.20 11.15 24 <.01
## French Creek NP-Cranberry NP 0.068 -0.55 0.69 0.32 15 .99
## French Creek NP-French Creek H2O -1.692 -2.29 -1.09 8.27 13 <.01
Cran NP (Normalized):
CH_1_CranNP<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_CranNP.csv", header = TRUE, check.names = FALSE)
CH_1_CranNP.names<-make.names(CH_1_CranNP$Genus, unique = TRUE)
rownames(CH_1_CranNP)<-CH_1_CranNP.names
CH_1_CranNP$Genus<-NULL
CH_1_CranNP_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_CranNP_metadata.csv", header=TRUE, check.names = FALSE, row.names = 1)
CH_1_CranNP_metadata_Phylo<-sample_data(CH_1_CranNP_metadata)
CH_1_CranNP_Phylo<-as.matrix(CH_1_CranNP)
CH_1_CranNP_Phylo<-otu_table(CH_1_CranNP_Phylo, taxa_are_rows = TRUE)
CH_1_CranNP_Phylo<-phyloseq(CH_1_CranNP_Phylo, CH_1_CranNP_metadata_Phylo)
CH_1_CranNP_rel<-transform(CH_1_CranNP_Phylo, "compositional")
CH_1_CranNP_Prevalence<-prevalence(CH_1_CranNP_rel, detection = 1/100, sort = TRUE)
CH_1_CranNP_Prevalence[1:50]
## Cetobacterium Plesiomonas
## 0.86666667 0.86666667
## Aeromonas Clostridium_sensu_stricto_1
## 0.73333333 0.66666667
## Paraclostridium Epulopiscium
## 0.33333333 0.26666667
## Terrisporobacter Clostridium_sensu_stricto_13
## 0.26666667 0.20000000
## Macellibacteroides Romboutsia
## 0.13333333 0.13333333
## Turicibacter Neisseria
## 0.06666667 0.06666667
## Porphyrobacter Rubellimicrobium
## 0.06666667 0.06666667
## Chromobacterium Luteolibacter
## 0.06666667 0.06666667
## Polynucleobacter Vibrio
## 0.06666667 0.06666667
## Limnohabitans SBZC.1223
## 0.06666667 0.00000000
## GOUTB8 Macromonas
## 0.00000000 0.00000000
## Aquincola Nannocystis
## 0.00000000 0.00000000
## Rhizomicrobium Anaerofustis
## 0.00000000 0.00000000
## Dendrosporobacter Rhodocyclus
## 0.00000000 0.00000000
## Methylovorus Candidatus_Paceibacter
## 0.00000000 0.00000000
## Quatrionicoccus Mongoliicoccus
## 0.00000000 0.00000000
## Candidatus_Flaviluna Fimbriiglobus
## 0.00000000 0.00000000
## Anabaena_BECID22 Alpinimonas
## 0.00000000 0.00000000
## Minicystis Cereibacter
## 0.00000000 0.00000000
## Candidatus_Limnoluna Spongiimonas
## 0.00000000 0.00000000
## Spongiivirga Salinirepens
## 0.00000000 0.00000000
## Kinneretia Mycoplana
## 0.00000000 0.00000000
## Ruminiclostridium_1 Asaccharospora
## 0.00000000 0.00000000
## Nitrospira Synechococcus_MBIC10613
## 0.00000000 0.00000000
## Microbacterium Rickettsia
## 0.00000000 0.00000000
FC Low NP (Normalized):
CH_1_FCLOW_NP<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_FCLOW_NP.csv", header = TRUE, check.names = FALSE)
CH_1_FCLOW_NP.names<-make.names(CH_1_FCLOW_NP$Genus, unique = TRUE)
rownames(CH_1_FCLOW_NP)<-CH_1_FCLOW_NP.names
CH_1_FCLOW_NP$Genus<-NULL
CH_1_FCLOW_NP$Genus<-NULL
CH_1_FC_LOW_NP_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_FCLOW_NP_metadata.csv", header=TRUE, check.names = FALSE, row.names = 1)
CH_1_FC_LOW_NP_metadata_Phylo<-sample_data(CH_1_FC_LOW_NP_metadata)
CH_1_FCLOW_NP_Phylo<-as.matrix(CH_1_FCLOW_NP)
CH_1_FCLOW_NP_Phylo<-otu_table(CH_1_FCLOW_NP_Phylo, taxa_are_rows = TRUE)
CH_1_FCLOW_NP_Phylo<-phyloseq(CH_1_FCLOW_NP_Phylo, CH_1_FC_LOW_NP_metadata_Phylo)
CH_1_FCLOW_NP_rel<-transform(CH_1_FCLOW_NP_Phylo, "compositional")
CH_1_FCLOW_NP_Prevalence<-prevalence(CH_1_FCLOW_NP_rel, detection = 1/100, sort = TRUE)
CH_1_FCLOW_NP_Prevalence[1:50]
## Clostridium_sensu_stricto_1 Plesiomonas
## 0.8888889 0.7777778
## Aeromonas Romboutsia
## 0.6666667 0.4444444
## Terrisporobacter Paraclostridium
## 0.4444444 0.4444444
## Epulopiscium Brevinema
## 0.3333333 0.3333333
## Clostridium_sensu_stricto_13 Cellulosilyticum
## 0.2222222 0.1111111
## Hymenobacter Neisseria
## 0.1111111 0.1111111
## Silvanigrella Mailhella
## 0.1111111 0.1111111
## Cetobacterium Mycoplasma
## 0.1111111 0.1111111
## SBZC.1223 GOUTB8
## 0.0000000 0.0000000
## Macromonas Aquincola
## 0.0000000 0.0000000
## Nannocystis Rhizomicrobium
## 0.0000000 0.0000000
## Anaerofustis Dendrosporobacter
## 0.0000000 0.0000000
## Rhodocyclus Methylovorus
## 0.0000000 0.0000000
## Candidatus_Paceibacter Quatrionicoccus
## 0.0000000 0.0000000
## Mongoliicoccus Candidatus_Flaviluna
## 0.0000000 0.0000000
## Fimbriiglobus Anabaena_BECID22
## 0.0000000 0.0000000
## Alpinimonas Minicystis
## 0.0000000 0.0000000
## Cereibacter Candidatus_Limnoluna
## 0.0000000 0.0000000
## Spongiimonas Spongiivirga
## 0.0000000 0.0000000
## Salinirepens Kinneretia
## 0.0000000 0.0000000
## Mycoplana Ruminiclostridium_1
## 0.0000000 0.0000000
## Asaccharospora Nitrospira
## 0.0000000 0.0000000
## Synechococcus_MBIC10613 Microbacterium
## 0.0000000 0.0000000
## Rickettsia AAP99
## 0.0000000 0.0000000
## Fonticella Parachlamydia
## 0.0000000 0.0000000
CRAN(Graph):
CRAN_H2O_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_CRAN_H2O_Log2.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : incomplete final line found by readTableHeader on '~/Dropbox/
## bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/
## CH_1_CRAN_H2O_Log2.csv'
CRAN_NP_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_Cran_NP_Log2.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : incomplete final line found by readTableHeader on '~/Dropbox/
## bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/
## CH_1_Cran_NP_Log2.csv'
CRAN_H2O_Log2_Graph<-ggplot(data = CRAN_H2O_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + ylim(0, 0.6) + labs(title = "Environmental Samples", subtitle = "Cranberry Creek", y = "ASV Abundance(%)", x = "") + theme_classic()
CRAN_NP_Log2_Graph<-ggplot(data = CRAN_NP_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + ylim(0, 60) + labs(title = "Core Gut Microbiome Selection in Larval Pike", subtitle = "Cranberry Creek", y = "", x = "") + theme_classic()
CRAN_Log2_FINAL<-ggarrange(CRAN_H2O_Log2_Graph, CRAN_NP_Log2_Graph, labels = c("A", "B"), common.legend = TRUE, legend = "right")
CRAN_Log2_FINAL
FC Low (Graph):
FCLOW_H2O_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_FCLOW_H2O_Log2.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : incomplete final line found by readTableHeader on '~/Dropbox/
## bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/
## CH_1_FCLOW_H2O_Log2.csv'
FCLOW_NP_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_FCLOW_NP_Log2.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : incomplete final line found by readTableHeader on '~/Dropbox/
## bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/
## CH_1_FCLOW_NP_Log2.csv'
FCLOW_H2O_Log2_Graph<-ggplot(data = FCLOW_H2O_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + ylim(0, 0.5) + labs(title = "Environmental Samples", subtitle = "Lower French Creek", y = "ASV Abundance(%)", x = "") + theme_classic()
FCLOW_NP_Log2_Graph<-ggplot(data = FCLOW_NP_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + ylim(0, 50) + labs(title = "Core Gut Microbiome Selection in Larval Pike", subtitle = "Lower French Creek", y = "", x = "") + theme_classic()
FCLOW_Log2_FINAL<-ggarrange(FCLOW_H2O_Log2_Graph, FCLOW_NP_Log2_Graph, labels = c("A", "B"), common.legend = TRUE, legend = "right")
FCLOW_Log2_FINAL
Comparing between Core Gut Microbiotas (Environments + NP) Graphs:
CH_1_ALL_ENV_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_ENV_COMPARE_ALL_MERGE_Log2.csv", header = TRUE, check.names = FALSE)
CH_1_ALL_ENV_Log2_Graph<-ggplot(data = CH_1_ALL_ENV_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + ylim(0, 0.2) + labs(title = "Environmental Samples", subtitle = "All Sites", y = "ASV Abundance(%)", x = "") + theme_classic()
CH_1_ALL_NP_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_NP_ALL_MERGE_Log2.csv", header = TRUE, check.names = FALSE)
CH_1_ALL_NP_Log2_Graph<-ggplot(data = CH_1_ALL_NP_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + labs(title = "Core Gut Microbiome Selection in Juvenile Pike", subtitle = "All Sites", x = "", y = "") + theme_classic()+ geom_text(aes(label = "Core", x = 0.66, y = 22), color = "black", size = 4)+ geom_text(aes(label = "Core", x = 0.89, y = 51), color = "black", size = 4)+ geom_text(aes(label = "Core", x = 1.12, y = 10), color = "black", size = 4) + geom_text(aes(label = "Core", x = 1.34, y = 30), color = "black", size = 4)+ geom_text(aes(label = "Core", x = 1.66, y = 18), color = "black", size = 4)+ geom_text(aes(label = "Core", x = 2.11, y = 37), color = "black", size = 4)+ geom_text(aes(label = "Core", x = 2.34, y = 11), color = "black", size = 4)
Core_FINAL<-ggarrange(CH_1_ALL_ENV_Log2_Graph, CH_1_ALL_NP_Log2_Graph, labels = c("A", "B"), common.legend = TRUE, legend = "right")
Core_FINAL
Determining significant effects on Core microbiota abundances between locations:
#All NP samples were grouped in one file with core microbiota genera and location of samples. Absolute abundance was determined by taking read count and dividing by 10,835 (Normalized depth)
CH_1_NP_compare<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_NP_Core_Compare.csv", header=TRUE, check.names = FALSE)
hist(CH_1_NP_compare$Abundance)
CH_1_Core_Abundance<-CH_1_NP_compare$Abundance
CH_1_Core_Abundance__scaled<-(CH_1_Core_Abundance-min(CH_1_Core_Abundance) + 0.000001/max(CH_1_Core_Abundance) + 0.000002)
CH_1_Core_fitbeta<-fitdist(CH_1_Core_Abundance__scaled, "beta")
CH_1_Core_fitbeta
## Fitting of the distribution ' beta ' by maximum likelihood
## Parameters:
## estimate Std. Error
## shape1 0.2921962 0.0335308
## shape2 1.4286120 0.2501722
plot(CH_1_Core_fitbeta)
cdfcomp(CH_1_Core_fitbeta, legendtext = "beta")
CH_1_core_beta_dist<-betareg(CH_1_Core_Abundance__scaled ~ Location*Genus, data=CH_1_NP_compare)
summary(CH_1_core_beta_dist)
##
## Call:
## betareg(formula = CH_1_Core_Abundance__scaled ~ Location * Genus,
## data = CH_1_NP_compare)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -3.8515 -0.4506 0.2163 0.6563 1.8786
##
## Coefficients (mean model with logit link):
## Estimate
## (Intercept) -1.75005
## LocationFrench Creek/NP -0.03124
## GenusCetobacterium 1.11021
## GenusClostridium_sensu_stricto_1 -0.31046
## GenusPlesiomonas 0.25243
## LocationFrench Creek/NP:GenusCetobacterium -2.17527
## LocationFrench Creek/NP:GenusClostridium_sensu_stricto_1 1.02440
## LocationFrench Creek/NP:GenusPlesiomonas -0.54156
## Std. Error
## (Intercept) 0.29483
## LocationFrench Creek/NP 0.45058
## GenusCetobacterium 0.39425
## GenusClostridium_sensu_stricto_1 0.38948
## GenusPlesiomonas 0.39090
## LocationFrench Creek/NP:GenusCetobacterium 0.63875
## LocationFrench Creek/NP:GenusClostridium_sensu_stricto_1 0.63872
## LocationFrench Creek/NP:GenusPlesiomonas 0.63689
## z value
## (Intercept) -5.936
## LocationFrench Creek/NP -0.069
## GenusCetobacterium 2.816
## GenusClostridium_sensu_stricto_1 -0.797
## GenusPlesiomonas 0.646
## LocationFrench Creek/NP:GenusCetobacterium -3.406
## LocationFrench Creek/NP:GenusClostridium_sensu_stricto_1 1.604
## LocationFrench Creek/NP:GenusPlesiomonas -0.850
## Pr(>|z|)
## (Intercept) 0.00000000292 ***
## LocationFrench Creek/NP 0.94472
## GenusCetobacterium 0.00486 **
## GenusClostridium_sensu_stricto_1 0.42539
## GenusPlesiomonas 0.51843
## LocationFrench Creek/NP:GenusCetobacterium 0.00066 ***
## LocationFrench Creek/NP:GenusClostridium_sensu_stricto_1 0.10875
## LocationFrench Creek/NP:GenusPlesiomonas 0.39515
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2.3259 0.3626 6.414 0.000000000142 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 158.8 on 9 Df
## Pseudo R-squared: 0.286
## Number of iterations: 34 (BFGS) + 1 (Fisher scoring)
Anova(CH_1_core_beta_dist, type = 3)
## Analysis of Deviance Table (Type III tests)
##
## Response: CH_1_Core_Abundance__scaled
## Df Chisq Pr(>Chisq)
## (Intercept) 1 35.2338 0.000000002924 ***
## Location 1 0.0048 0.944717
## Genus 3 14.1875 0.002661 **
## Location:Genus 3 25.5518 0.000011836917 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Proc_core_marginal<-emmeans(CH_1_core_beta_dist, ~ Location*Genus)
pairs(Proc_core_marginal, adjust = "tukey")
## contrast
## Cranberry/NP,Aeromonas - French Creek/NP,Aeromonas
## Cranberry/NP,Aeromonas - Cranberry/NP,Cetobacterium
## Cranberry/NP,Aeromonas - French Creek/NP,Cetobacterium
## Cranberry/NP,Aeromonas - Cranberry/NP,Clostridium_sensu_stricto_1
## Cranberry/NP,Aeromonas - French Creek/NP,Clostridium_sensu_stricto_1
## Cranberry/NP,Aeromonas - Cranberry/NP,Plesiomonas
## Cranberry/NP,Aeromonas - French Creek/NP,Plesiomonas
## French Creek/NP,Aeromonas - Cranberry/NP,Cetobacterium
## French Creek/NP,Aeromonas - French Creek/NP,Cetobacterium
## French Creek/NP,Aeromonas - Cranberry/NP,Clostridium_sensu_stricto_1
## French Creek/NP,Aeromonas - French Creek/NP,Clostridium_sensu_stricto_1
## French Creek/NP,Aeromonas - Cranberry/NP,Plesiomonas
## French Creek/NP,Aeromonas - French Creek/NP,Plesiomonas
## Cranberry/NP,Cetobacterium - French Creek/NP,Cetobacterium
## Cranberry/NP,Cetobacterium - Cranberry/NP,Clostridium_sensu_stricto_1
## Cranberry/NP,Cetobacterium - French Creek/NP,Clostridium_sensu_stricto_1
## Cranberry/NP,Cetobacterium - Cranberry/NP,Plesiomonas
## Cranberry/NP,Cetobacterium - French Creek/NP,Plesiomonas
## French Creek/NP,Cetobacterium - Cranberry/NP,Clostridium_sensu_stricto_1
## French Creek/NP,Cetobacterium - French Creek/NP,Clostridium_sensu_stricto_1
## French Creek/NP,Cetobacterium - Cranberry/NP,Plesiomonas
## French Creek/NP,Cetobacterium - French Creek/NP,Plesiomonas
## Cranberry/NP,Clostridium_sensu_stricto_1 - French Creek/NP,Clostridium_sensu_stricto_1
## Cranberry/NP,Clostridium_sensu_stricto_1 - Cranberry/NP,Plesiomonas
## Cranberry/NP,Clostridium_sensu_stricto_1 - French Creek/NP,Plesiomonas
## French Creek/NP,Clostridium_sensu_stricto_1 - Cranberry/NP,Plesiomonas
## French Creek/NP,Clostridium_sensu_stricto_1 - French Creek/NP,Plesiomonas
## Cranberry/NP,Plesiomonas - French Creek/NP,Plesiomonas
## estimate SE df z.ratio p.value
## 0.00389751 0.05605564 Inf 0.070 1.0000
## -0.19724240 0.07123953 Inf -2.769 0.1029
## 0.09317049 0.03963718 Inf 2.351 0.2662
## 0.03504597 0.04439228 Inf 0.789 0.9937
## -0.10786480 0.07627127 Inf -1.414 0.8511
## -0.03473991 0.05402269 Inf -0.643 0.9983
## 0.03603587 0.04955534 Inf 0.727 0.9962
## -0.20113991 0.07611213 Inf -2.643 0.1408
## 0.08927298 0.04783958 Inf 1.866 0.5744
## 0.03114846 0.05185763 Inf 0.601 0.9989
## -0.11176232 0.08084614 Inf -1.382 0.8656
## -0.03863742 0.06031115 Inf -0.641 0.9983
## 0.03213835 0.05634056 Inf 0.570 0.9992
## 0.29041289 0.06462969 Inf 4.493 0.0002
## 0.23228837 0.06790828 Inf 3.421 0.0145
## 0.08937760 0.09201547 Inf 0.971 0.9784
## 0.16250249 0.07466301 Inf 2.176 0.3660
## 0.23327827 0.07138754 Inf 3.268 0.0241
## -0.05812452 0.03319247 Inf -1.751 0.6533
## -0.20103529 0.07043951 Inf -2.854 0.0822
## -0.12791040 0.04553002 Inf -2.809 0.0926
## -0.05713462 0.03982670 Inf -1.435 0.8413
## -0.14291077 0.07323810 Inf -1.951 0.5153
## -0.06978588 0.04967248 Inf -1.405 0.8554
## 0.00098990 0.04470948 Inf 0.022 1.0000
## 0.07312489 0.07945248 Inf 0.920 0.9842
## 0.14390067 0.07647775 Inf 1.882 0.5636
## 0.07077578 0.05433666 Inf 1.303 0.8983
##
## P value adjustment: tukey method for comparing a family of 8 estimates
cld(Proc_core_marginal, alpha = 0.05, Letters = letters, adjust = "tukey")
## Location Genus emmean SE df
## French Creek/NP Cetobacterium 0.0548698 0.01937053 Inf
## French Creek/NP Plesiomonas 0.1120045 0.03710784 Inf
## Cranberry/NP Clostridium_sensu_stricto_1 0.1129944 0.02988853 Inf
## French Creek/NP Aeromonas 0.1441428 0.04581108 Inf
## Cranberry/NP Aeromonas 0.1480403 0.03718525 Inf
## Cranberry/NP Plesiomonas 0.1827802 0.04350646 Inf
## French Creek/NP Clostridium_sensu_stricto_1 0.2559051 0.06906070 Inf
## Cranberry/NP Cetobacterium 0.3452827 0.06271771 Inf
## asymp.LCL asymp.UCL .group
## 0.00204625 0.1076934 a
## 0.01081107 0.2131978 a
## 0.03148808 0.1945006 a
## 0.01921564 0.2690700 ab
## 0.04663585 0.2494448 ab
## 0.06413778 0.3014227 ab
## 0.06757606 0.4442342 ab
## 0.17425104 0.5163144 b
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 8 estimates
## P value adjustment: tukey method for comparing a family of 8 estimates
## significance level used: alpha = 0.05
CRAN
#Combine data from CH_1_CRAN_H2O_Log2 & CH_1_CRAN_NP_Log2
#Rename .csv as CH_1_Cran_Compare.csv
CH_1_Cran_Compare<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_CRAN_COMPARE.csv", header=TRUE, check.names = FALSE)
CH_1_Cran_log2fc<-log2(CH_1_Cran_Compare$Log2)
ggplot(CH_1_Cran_Compare, aes(Genus, CH_1_Cran_log2fc), size = 10, shape = 2) + geom_point(shape=18, size = 10) + ylim(-5,16) + theme_classic()+ labs (y = "Log2-Fold Change", title = "Cranberry Creek", subtitle = "Northern Pike vs. Water Samples") + theme(axis.text.x = element_text(angle=315, vjust = -0.25, hjust = .25))+ geom_hline(yintercept = 0) + geom_hline(yintercept = -1, linetype = "dashed") + geom_hline(yintercept = 1, linetype = "dashed")
## Warning: Removed 4 rows containing missing values (geom_point).
FC LOW
#Combine data from CH_1_FCLOW_H2O_Log2 & CH_1_FCLOW_NP_Log2
#Rename .csv as CH_1_FCLOW_Compare.csv
CH_1_FCLOW_Compare<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_FCLOW_COMPARE.csv", header=TRUE, check.names = FALSE)
CH_1_FCLOW_log2fc<-log2(CH_1_FCLOW_Compare$Log2)
CH_1_FCLOW_Compare$Log2<-CH_1_FCLOW_log2fc
ggplot(CH_1_FCLOW_Compare, aes(Genus, CH_1_FCLOW_log2fc), size = 10, shape = 2) + geom_point(shape=18, size = 10) + ylim(0,16) + theme_classic() + labs (y = "Log2-Fold Change", title = "Lower French Creek", subtitle = "Northern Pike vs. Water Samples") + theme(axis.text.x = element_text(angle=315, vjust = -0.25, hjust = .25))+ geom_hline(yintercept = -1, linetype = "dashed") + geom_hline(yintercept = 1, linetype = "dashed")
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_hline).
Determine Microbe Functions with Tax4Fun
#Note the Mycoplasma dominated FC Low sample (NP 002) was removed as an outlier.
CH_1_Tax4Fun_ASVs<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_ASVs.csv", header = TRUE, check.names = FALSE, row.names = 1)
CH_1_Tax4Fun_ASVs_t<-as.data.frame(t(CH_1_Tax4Fun_ASVs))
CH_1_Tax4Fun_ASVs_m<-as.matrix(CH_1_Tax4Fun_ASVs_t)
CH_1_Tax4Fun_ASVs_list<-list(CH_1_Tax4Fun_ASVs_m, CH_1_taxa)
names(CH_1_Tax4Fun_ASVs_list)<-paste("name", 1:2, sep="")
tmp<-tempdir()
download_ref(tmp,reference='silva_ko',overwrite=FALSE)
CH_1_FUNCTIONS <- t4f(CH_1_Tax4Fun_ASVs_list$name1, rows_are_taxa=FALSE, tax_table = CH_1_Tax4Fun_ASVs_list$name2, reference_path = tmp, type = 'uproc', short=TRUE, cn_normalize = TRUE, sample_normalize =TRUE, drop=TRUE)
write.csv(CH_1_FUNCTIONS$fxn_table, "CH_1_FUNCTIONS_FXN_TABLE.csv")
write.csv(CH_1_FUNCTIONS$method_meta, "CH_1_FUNCTIONS_METHOD_META.csv")
write.csv(CH_1_FUNCTIONS$fxn_meta$KEGG_Description, "CH_1_FUNCTIONS_FXN_META_KEGG_DESCRIPTION.csv")
write.csv(CH_1_FUNCTIONS$fxn_meta$KEGG_Pathways, "CH_1_FUNCTIONS_FXN_META_KEGG_PATHWAYS.csv")
Function Bar Graph (Top 20 KO’s):
head(sort(colSums(CH_1_FUNCTIONS$fxn_table), decreasing = TRUE), n = 20)
## K02035 K03406 K03737 K15125 K03046 K03043
## 0.11398623 0.08763998 0.08725464 0.08086655 0.07455367 0.06786098
## K02015 K00656 K03657 K01955 K03701 K02337
## 0.06765369 0.06492334 0.06482366 0.06298126 0.06164818 0.05645908
## K01952 K02033 K03763 K01870 K03070 K01873
## 0.05636266 0.05616618 0.05383037 0.05261535 0.05107165 0.04906250
## K01872 K02032
## 0.04889534 0.04860961
###Determine's Top 20 KO Hits - Fill out CH_1_Tax4Fun_Analysis_ForR.csv with average ASV Relative abundance values (+/- SE), remove KO's with unknown KEGG pathways, and combine simialr pathways based on level 1 KEGG description (include level 2 in parentheses). For metabolic KOs, group all as "Metabolic Pathways".
#Environmental Signal Processing
CH_1_Tax4Fun_ENV_Proc<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_Analysis_Env_Sig.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on '~/
## Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/
## CH_1_Tax4Fun_Analysis_Env_Sig.csv'
CH_1_Tax4Fun_Env_Sig_Graph<-ggplot(data = CH_1_Tax4Fun_ENV_Proc, aes(x = Pathways, y =AVG, fill = Location)) + geom_bar(stat="identity", color = "black", position = position_dodge()) + labs (title = "Tax4Fun Microbial Function Predictions", subtitle = "Environmental Information Processing (Signal Transduction)", y = "Relative Gene Abundance", x = "") + theme(axis.text.y = element_text(size =7)) + geom_errorbar(aes(ymin=AVG-SE, ymax=AVG+SE), width=.2, position=position_dodge(0.9)) + theme_classic() + theme(axis.ticks.x.bottom = element_blank()) + theme(axis.text.x = element_blank()) + scale_fill_manual(values = c("darkslategray2", "darkgoldenrod4"))
CH_1_Tax4Fun_Env_Sig_Graph
#Cellular Processes
CH_1_Tax4Fun_Cell_Proc<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_Cell_Proc.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on '~/
## Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/
## CH_1_Tax4Fun_Cell_Proc.csv'
CH_1_Tax4Fun_Cell_Sig_Graph<-ggplot(data = CH_1_Tax4Fun_Cell_Proc, aes(x = Pathways, y =AVG, fill = Location)) + geom_bar(stat="identity", color = "black", position = position_dodge()) + labs (title = "Tax4Fun Microbial Function Predictions", subtitle = "Cellular Processes (Motility)", y = "", x = "") + theme(axis.text.y = element_text(size =7)) + geom_errorbar(aes(ymin=AVG-SE, ymax=AVG+SE), width=.2, position=position_dodge(0.9)) + theme_classic() + theme(axis.ticks.x.bottom = element_blank()) + theme(axis.text.x = element_blank())+ scale_fill_manual(values = c("darkslategray2", "darkgoldenrod4"))
CH_1_Tax4Fun_Cell_Sig_Graph
#Metabolic Pathways
CH_1_Tax4Fun_Meta<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_Metabolic.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on '~/
## Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/
## CH_1_Tax4Fun_Metabolic.csv'
CH_1_Tax4Fun_Metabolic_Graph<-ggplot(data = CH_1_Tax4Fun_Meta, aes(x = Pathways, y =AVG, fill = Location)) + geom_bar(stat="identity", color = "black", position = position_dodge()) + labs (title = "Tax4Fun Microbial Function Predictions", subtitle = "Metabolic Pathways", y = "Relative Gene Abundance", x = "") + theme(axis.text.y = element_text(size =7)) + geom_errorbar(aes(ymin=AVG-SE, ymax=AVG+SE), width=.2, position=position_dodge(0.9)) + theme_classic() + theme(axis.ticks.x.bottom = element_blank()) + theme(axis.text.x = element_blank())+ scale_fill_manual(values = c("darkslategray2", "darkgoldenrod4"))
CH_1_Tax4Fun_Metabolic_Graph
#Genetic Information Processing
CH_1_Tax4Fun_Gen<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_Genetic_Proc.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on '~/
## Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/
## CH_1_Tax4Fun_Genetic_Proc.csv'
CH_1_Tax4Fun_Gen_Graph<-ggplot(data = CH_1_Tax4Fun_Gen, aes(x = Pathways, y =AVG, fill = Location)) + geom_bar(stat="identity", color = "black", position = position_dodge()) + labs (title = "Tax4Fun Microbial Function Predictions", subtitle = "Genetic Information Processing (Replication/Repair or Transcription", y = "", x = "") + theme(axis.text.y = element_text(size =7)) + geom_errorbar(aes(ymin=AVG-SE, ymax=AVG+SE), width=.2, position=position_dodge(0.9)) + theme_classic() + theme(axis.ticks.x.bottom = element_blank()) + theme(axis.text.x = element_blank())+ scale_fill_manual(values = c("darkslategray2", "darkgoldenrod4"))
CH_1_Tax4Fun_Gen_Graph
#ALL
ggarrange(CH_1_Tax4Fun_Env_Sig_Graph, CH_1_Tax4Fun_Cell_Sig_Graph, CH_1_Tax4Fun_Metabolic_Graph, CH_1_Tax4Fun_Gen_Graph, ncol=2, nrow=2, common.legend = TRUE, legend = c("right"))
Significance per KO between Locations
#Export Data for Top KO's that map to a specified pathway
#File name = CH_2_KO_Env_Processing.csv
CH_1_Tax4Fun_Significance<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_significance.csv")
CH_1_T4F_CranNP<-subset(CH_1_Tax4Fun_Significance, Location == "Cranberry NP")
CH_1_T4F_FCLOW_NP<-subset(CH_1_Tax4Fun_Significance, Location == "French Creek NP")
#Environmental Information Processing (Signal Transduction)
#Welch's Two Sample t-test (Location vs. Pathway)
t_test_Env<-t.test(CH_1_T4F_CranNP$Environmental, CH_1_T4F_FCLOW_NP$Environmental, var.equal = FALSE, paired=FALSE)
t_test_Env
##
## Welch Two Sample t-test
##
## data: CH_1_T4F_CranNP$Environmental and CH_1_T4F_FCLOW_NP$Environmental
## t = -4.3783, df = 21.561, p-value = 0.0002493
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.004572976 -0.001630908
## sample estimates:
## mean of x mean of y
## 0.004616423 0.007718365
#Cellular Processes (Motility)
#Welch's Two Sample t-test (Location vs. Pathway)
t_test_Cell<-t.test(CH_1_T4F_CranNP$Cellular, CH_1_T4F_FCLOW_NP$Cellular, var.equal = FALSE, paired=FALSE)
t_test_Cell
##
## Welch Two Sample t-test
##
## data: CH_1_T4F_CranNP$Cellular and CH_1_T4F_FCLOW_NP$Cellular
## t = -4.3783, df = 21.561, p-value = 0.0002493
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.004572976 -0.001630908
## sample estimates:
## mean of x mean of y
## 0.004616423 0.007718365
#Metabolic Pathways
#Welch's Two Sample t-test (Location vs. Pathway)
t_test_Metab<-t.test(CH_1_T4F_CranNP$Metabolic, CH_1_T4F_FCLOW_NP$Metabolic, var.equal = FALSE, paired=FALSE)
t_test_Metab
##
## Welch Two Sample t-test
##
## data: CH_1_T4F_CranNP$Metabolic and CH_1_T4F_FCLOW_NP$Metabolic
## t = 0.89809, df = 10.545, p-value = 0.3892
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.006729713 0.015925050
## sample estimates:
## mean of x mean of y
## 0.02432483 0.01972717
#Genetic Information Processing (Repair/Replication or Transcription)
#Welch's Two Sample t-test (Location vs. Pathway)
t_test_Gen<-t.test(CH_1_T4F_CranNP$Genetic, CH_1_T4F_FCLOW_NP$Genetic, var.equal = FALSE, paired=FALSE)
t_test_Gen
##
## Welch Two Sample t-test
##
## data: CH_1_T4F_CranNP$Genetic and CH_1_T4F_FCLOW_NP$Genetic
## t = -0.14495, df = 8.6172, p-value = 0.8881
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01332309 0.01172871
## sample estimates:
## mean of x mean of y
## 0.01505936 0.01585655